#!/usr/bin/env perl

# UTF8 boilerplace, per http://stackoverflow.com/questions/6162484/why-does-modern-perl-avoid-utf-8-by-default/
use v5.14;
use strict;
use warnings;
use warnings qw( FATAL utf8 );
use utf8;
use open qw( :std :utf8 );
use Unicode::Normalize qw( NFC );
use Unicode::Collate;
use Encode qw( decode );

if ( grep /\P{ASCII}/ => @ARGV ) {
    @ARGV = map { decode( 'UTF-8', $_ ) } @ARGV;
}

# If there is __DATA__,then uncomment next line:
# binmode( DATA, ':encoding(UTF-8)' );
# UTF8 boilerplace, per http://stackoverflow.com/questions/6162484/why-does-modern-perl-avoid-utf-8-by-default/

# Useful common code
use autodie;
use Carp qw( carp croak confess cluck );
use English qw( -no_match_vars );
use Data::Dumper qw( Dumper );

# give a full stack dump on any untrapped exceptions
local $SIG{ __DIE__ } = sub {
    confess "Uncaught exception: @_" unless $^S;
};

# now promote run-time warnings into stackdumped exceptions
#   *unless* we're in an try block, in which
#   case just generate a clucking stackdump instead
local $SIG{ __WARN__ } = sub {
    if   ( $^S ) { cluck "Trapped warning: @_" }
    else         { confess "Deadly warning: @_" }
};

# Useful common code

use DBI;
use Parallel::ForkManager;
use List::Util qw( any );
use Sys::Info;
use Pg::Explain;
use File::Temp qw( tempdir );

my $partitions = get_list_of_partitions();
my $cpus       = Sys::Info->new->device( 'CPU' )->count;
my $output     = tempdir( 'interesting-explains.XXXXXX', TMPDIR => 1 );
printf "Processing %d partitions in %d workers, output goes to %s\n", scalar @{ $partitions }, $cpus, $output;

my $pm = Parallel::ForkManager->new( $cpus );

PARTITION:
for my $partno ( 0 .. $#{ $partitions } ) {
    my $part = $partitions->[ $partno ];
    $pm->start and next PARTITION;
    printf "Starting partition #%d\n", 1 + $partno;

    open my $out, '>', "${output}/${part}.txt";

    my $dbh = get_dbh();

    my $seen        = 0;
    my $errors      = 0;
    my $interesting = 0;

    my $prev_id = '';
    my $query   = sprintf "select id, plan FROM %s WHERE is_public and NOT is_deleted and NOT is_anonymized and plan ~ 'actual time=[0-9]' AND id > ? order by ID ASC LIMIT 100", $part;
    while ( 1 ) {
        my $rows = $dbh->selectall_arrayref( $query, { 'Slice' => {} }, $prev_id );
        last if 0 == scalar @{ $rows };
        $prev_id = $rows->[ -1 ]->{ 'id' };
        for my $row ( @{ $rows } ) {
            $seen++;
            my $explain;
            eval {
                $explain = Pg::Explain->new( 'source' => $row->{ 'plan' } );
                $explain->parse_source();
            };
            if ( $EVAL_ERROR ) {
                $errors++;
                next;
            }
            next unless $explain->top_node;

            my $is_interesting = 0;

            for my $line ( @{ output_lines( $row->{ 'id' }, $explain ) } ) {
                print $out join( "\t", @{ $line } ) . "\n";
                $is_interesting = 1;
            }

            $interesting++ if $is_interesting;
        }
    }

    $dbh->rollback();
    $dbh->disconnect();
    close $out;
    printf "Partition %d done. %d plans scanned, %d errored out, %d interesting saved.\n", 1 + $partno, $seen, $errors, $interesting;
    $pm->finish;
}
$pm->wait_all_children;

printf "All done, output in %s\n", $output;

exit;

sub output_lines {
    my ( $id, $plan ) = @_;
    my $ret      = [];
    my $plan_len = length( $plan->source );
    for my $node ( $plan->top_node, $plan->top_node->all_recursive_subnodes ) {

        next unless $node->estimated_row_width;
        next unless $node->total_rows;
        next unless $node->total_rows_removed;
        next unless $node->extra_info;

        # At least 3 pages worth of data is returned
        next unless ( $node->total_rows + $node->total_rows_removed ) * $node->estimated_row_width > 3 * 8192;

        # At least 90% of rows were removed
        next unless $node->total_rows_removed > 2 * $node->total_rows;

        my @filter_lines = grep { /^Filter:/ } @{ $node->extra_info };

        # There are filter expressions
        next if 0 == scalar @filter_lines;

        push @{ $ret }, map { [ $id, $plan_len, $node->type, $_ ] } @filter_lines;
    }
    return $ret;
}

sub get_dbh {
    my $dsn = sprintf 'dbi:Pg:dbname=%s', $ENV{ 'PGDATABASE' } || 'depesz_explain';
    $dsn .= sprintf ';host=%s', $ENV{ 'PGHOST' } if $ENV{ 'PGHOST' };
    $dsn .= sprintf ';port=%s', $ENV{ 'PGPORT' } if $ENV{ 'PGPORT' };

    return DBI->connect( $dsn, undef, undef, { 'AutoCommit' => 0, 'PrintError' => 1, 'RaiseError' => 1 } );
}

sub get_list_of_partitions {
    my $dbh   = get_dbh();
    my $parts = $dbh->selectcol_arrayref( "
        SELECT
            c.oid::regclass
        FROM
            pg_catalog.pg_class c
            JOIN pg_catalog.pg_inherits i ON c.oid = i.inhrelid
        WHERE
            i.inhparent = 'public.plans'::regclass
            AND c.relkind = 'r'
        ORDER BY
            c.relpages DESC
    " );
    $dbh->rollback();
    $dbh->disconnect();
    return $parts;
}
